%reset -f
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # High-res graphs (rendered irrelevant by svg option below)
%config InlineBackend.print_figure_kwargs = {'bbox_inches':'tight'} # No extra white space
%config InlineBackend.figure_format = 'svg' # 'png' is default
import warnings
warnings.filterwarnings('ignore') # Because we are adults
from IPython.core.debugger import set_trace
import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# iPyPublish imports
# from ipypublish.scripts.ipynb_latex_setup import *
# from IPython.display import SVG, display, Markdown
The bread and butter for plotting in Python. See here and Ben Deverett's excellent notebook for tutorials.
with plt.xkcd():
# Based on "Stove Ownership" from XKCD by Randall Monroe
# http://xkcd.com/418/
fig = plt.figure()
ax = fig.add_axes((0.1, 0.2, 0.8, 0.7))
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
plt.xticks([])
plt.yticks([])
ax.set_ylim([-30, 10])
data = np.ones(100)
data[70:] -= np.arange(30)
plt.annotate(
'THE DAY I REALIZED\nI COULD COOK BACON\nWHENEVER I WANTED',
xy=(70, 1), arrowprops=dict(arrowstyle='->'), xytext=(15, -10))
plt.plot(data)
plt.xlabel('time')
plt.ylabel('my overall health')
fig.text(
0.5, 0.05,
'"Stove Ownership" from xkcd by Randall Monroe',
ha='center')
with plt.xkcd():
# Based on "The Data So Far" from XKCD by Randall Monroe
# http://xkcd.com/373/
fig = plt.figure()
ax = fig.add_axes((0.1, 0.2, 0.8, 0.7))
ax.bar([0, 1], [0, 100], 0.25)
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticks([0, 1])
ax.set_xlim([-0.5, 1.5])
ax.set_ylim([0, 110])
ax.set_xticklabels(['CONFIRMED BY\nEXPERIMENT', 'REFUTED BY\nEXPERIMENT'])
plt.yticks([])
plt.title("CLAIMS OF SUPERNATURAL POWERS")
fig.text(
0.5, -0.05,
'"The Data So Far" from xkcd by Randall Monroe',
ha='center')
plt.show()
Wrapper around Matplotlib that makes plotting attractive figures easier.
pal = sns.color_palette("husl", 8) # optionally set number of colors
sns.set_palette(pal)
sns.palplot(sns.color_palette())
sns.set_palette("tab10")
sns.palplot(sns.color_palette())
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.set_palette(flatui)
sns.palplot(sns.color_palette())
You can generate these with the wes Python package.
That said, installation can be a little annoying, since you will often get an error for missing the colors.json file. If you get that error, simply download the tarball of the latest version of the package, extract colors.json and place it in the appropriate location (i.e., where the error tells you it cannot be found).
import wes
wes.available(show=True)
# wes.set_palette('Darjeeling')
sns.set(style="ticks") # overwrites color palette
# Load the example dataset for Anscombe's quartet
anscombe = sns.load_dataset("anscombe")
# And of course, you can combine it with xkcd style if you want
with plt.xkcd():
# Show the results of a linear regression within each dataset
# Semi-colon suppresses the non-graph output
ax = sns.lmplot(
x="x",
y="y",
col="dataset",
hue="dataset",
data=anscombe,
col_wrap=2,
ci=None,
height=4, # palette=pal,
scatter_kws={"s": 50, "alpha": 1},
)
# Change axis labels
ax.set(xlabel="X", ylabel="Y");
Defaults to showing the 95% confidence interval.
tips = sns.load_dataset("tips")
ax = sns.barplot(x="day", y="total_bill", data=tips, capsize=0.1)
wes.set_palette("Darjeeling") # change the color scheme
fig, ax = plt.subplots()
# Output to the size of A4 paper
fig.set_size_inches(11.7, 8.27)
# Overlay a swarmplot on top of a violinplot
ax = sns.violinplot(x="day", y="total_bill", data=tips, inner=None)
ax = sns.swarmplot(x="day", y="total_bill", data=tips, color="white")
ax.set(xlabel="Day of the Week", ylabel="Total Bill in $");
def set_titles(thisPlot, titleList, fontSize):
for ax, title in zip(thisPlot.axes.flat, titleList):
ax.set_title(title, fontsize=fontSize)
def set_labels(thisPlot, xLabel, yLabel, fontSize):
thisPlot.set_xlabels(xLabel, fontsize=fontSize)
thisPlot.set_ylabels(yLabel, fontsize=fontSize)
def set_xtick_labels(thisPlot, tickList, fontSize):
thisPlot.set_xticklabels(tickList, fontsize=fontSize)
def set_legend(thisPlot, legendEntries, fontSize):
# find where last graph is so we can put the legend there
maxIndex = max(thisPlot.axes.shape) - 1
# format the legend, placing it outside the axes
thisPlot.axes[0][maxIndex].legend(
bbox_to_anchor=(1.05, 1), loc=2, fontsize=fontSize, borderaxespad=0.0
)
legend = thisPlot.axes[0][maxIndex].get_legend()
labels = legend.get_texts()
for i, thisLabel in enumerate(labels):
labels[i].set_text(legendEntries[i])
sns.set_palette("tab10")
# Make plots -- many of these arguments are optional
barPlot = sns.catplot(
x="day",
y="total_bill",
hue="sex",
col="time",
kind="bar",
data=tips,
height=5,
aspect=1,
legend=False,
);
beeswarmPlot = sns.catplot(
x="day",
y="total_bill",
hue="sex",
col="time",
kind="swarm",
dodge=True,
data=tips,
height=5,
aspect=1,
legend=False,
);
# Format them nicely!
# Axis labels
xLabel = "" # "Day"
yLabel = "Total Bill"
set_labels(barPlot, xLabel, yLabel, 20)
set_labels(beeswarmPlot, xLabel, yLabel, 20)
# Titles
title_list = ["Lunch", "Dinner"]
titles = [x.title() for x in title_list] # ["Bimodal", "Normal", "Skewed"]
set_titles(barPlot, titles, 30)
set_titles(beeswarmPlot, titles, 30)
# X axis tick labels or category labels
x_tick_labels = ["Thursday", "Friday", "Saturday", "Sunday"]
set_xtick_labels(barPlot, x_tick_labels, 15)
set_xtick_labels(beeswarmPlot, x_tick_labels, 15)
# Change legends
legendEntries = ["Male", "Female"]
set_legend(barPlot, legendEntries, 15)
set_legend(beeswarmPlot, legendEntries, 15)
# Save plots
# barPlot.savefig("barPlot.svg") # can also use other extensions, like .png
# beeswarmPlot.savefig("beePlot.svg")
Declarative plotting library with a lot of useful chart types. Examples below are taken from here.
from vega_datasets import data
source = data.cars()
alt.Chart(source).mark_circle(size=60).encode(
x="Horsepower",
y="Miles_per_Gallon",
color="Origin",
tooltip=["Name", "Origin", "Horsepower", "Miles_per_Gallon"],
).interactive()
from vega_datasets import data
source = data.cars()
alt.Chart(source).mark_circle().encode(
alt.X(alt.repeat("column"), type="quantitative"),
alt.Y(alt.repeat("row"), type="quantitative"),
color="Origin:N",
).properties(width=150, height=150).repeat(
row=["Horsepower", "Acceleration", "Miles_per_Gallon"],
column=["Miles_per_Gallon", "Acceleration", "Horsepower"],
).interactive()
import pandas as pd
import altair as alt
import numpy as np
np.random.seed(42)
# Generating Data
source = pd.DataFrame(
{
"Trial A": np.random.normal(0, 0.8, 1000),
"Trial B": np.random.normal(-2, 1, 1000),
"Trial C": np.random.normal(3, 2, 1000),
}
)
# Tidying Data
source = pd.melt(
source,
id_vars=source.index.name,
value_vars=source.columns,
var_name="Experiment",
value_name="Measurement",
)
alt.Chart(source).mark_area(opacity=0.3, interpolate="step").encode(
alt.X("Measurement", bin=alt.Bin(maxbins=100)),
alt.Y("count()", stack=None),
alt.Color("Experiment", scale=alt.Scale(range=["#0000ff", "#008000", "#ff0000"])),
)
Interactive visualization library leveraging JavaScript. See here for a video tutorial and here for a notebook with various visualizations, including some made with Bokeh.
from bokeh.plotting import figure, output_notebook, show
this_plot = figure(width=600, height=600)
this_plot.circle(x=tips["total_bill"], y=tips["tip"], size=10, alpha=0.7)
output_notebook() # to output inline
show(this_plot)
Let's plot a scatterplot of tip amount vs. total bill, separately for men and women.
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
import bokeh.models.tools as tools
# Get relevant subsets of data
male_data = tips[tips["sex"] == "Male"]
female_data = tips[tips["sex"] == "Female"]
# Convert to format bokeh understands
source_male = ColumnDataSource(male_data)
source_female = ColumnDataSource(female_data)
# Set up figure
this_plot = figure(width=600, height=600)
this_plot.circle(
source=source_male,
x="total_bill",
y="tip",
color="teal",
size=10,
alpha=0.7,
legend="Men",
)
this_plot.circle(
source=source_female,
x="total_bill",
y="tip",
color="darkorange",
size=10,
alpha=0.7,
legend="Women",
)
# Set axis labels
this_plot.xaxis.axis_label = "Total Bill"
this_plot.yaxis.axis_label = "Tip Amount"
# Show information when hovering the mouse over datapoints
this_plot.add_tools(tools.HoverTool(tooltips=[("Day", "@day")])) # @ chooses feature
# Hide all circles of a given category when clicked in legend
this_plot.legend.click_policy = "hide"
output_notebook()
show(this_plot)
from pivottablejs import pivot_ui
pivot_ui(tips)
Dash/Plotly is another package for producing really nice and interactive graphs, but it requires signing up for an account to initialize it. After initialization you can use it online by default (which means all of your graphs get saved to the cloud for everyone to see forever) or you can use it offline (as demoed below). Examples taken or modified from here.
I'm not familiar with the new Dash API that's been recently introduced, nor have I really explored using Plotly. I've been able to get everything that I need done in Matplotlib/Seaborn, so understand that the code snippets below may no longer work with recent versions of the Plotly package (which seems like a different thing to Dash).
When I first tried using plotly I sometimes got IOPub data rate exceeded errors. Here's how you fix that:
jupyter notebook --generate-config to generate a clean configuration file with all parameters commented outc.NotebookApp.iopub_data_rate_limit and c.NotebookApp.iopub_msg_rate_limit to be some absurdly large numbersimport plotly
# initialize with your credentials -- only need to do once ever in life,
# not even once per notebook.
# plotly.tools.set_credentials_file(username='XXX', api_key='XXX')
from plotly.graph_objs import Scatter, Layout
plotly.offline.init_notebook_mode(connected=True)
plotly.offline.iplot({
"data": [Scatter(x=[1, 2, 3, 4], y=[4, 3, 2, 1])],
"layout": Layout(title="hello world")
})
import plotly.offline as py
import plotly.figure_factory as ff
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")
table = ff.create_table(df)
py.iplot(table, filename='plotly\table1')
import plotly.offline as py
from plotly.graph_objs import *
data = [Bar(x=df.School,
y=df.Gap)]
py.iplot(data)
trace_women = Bar(x=df.School,
y=df.Women,
name='Women',
marker=dict(color='#ffcdd2'))
trace_men = Bar(x=df.School,
y=df.Men,
name='Men',
marker=dict(color='#A2D5F2'))
trace_gap = Bar(x=df.School,
y=df.Gap,
name='Gap',
marker=dict(color='#59606D'))
data = [trace_women, trace_men, trace_gap]
layout = Layout(title="Average Earnings for Graduates",
xaxis=dict(title='School'),
yaxis=dict(title='Salary (in thousands)'))
fig = Figure(data=data, layout=layout)
py.iplot(fig)
data = [dict(
visible = False,
line=dict(color='00CED1', width=6),
name = '𝜈 = '+str(step),
x = np.arange(0,10,0.01),
y = np.sin(step*np.arange(0,10,0.01))) for step in np.arange(0,5,0.1)]
data[10]['visible'] = True
steps = []
for i in range(len(data)):
step = dict(
method = 'restyle',
args = ['visible', [False] * len(data)],
)
step['args'][1][i] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active = 10,
currentvalue = {"prefix": "Frequency: "},
pad = {"t": 50},
steps = steps
)]
layout = dict(sliders=sliders)
fig = dict(data=data, layout=layout)
py.iplot(fig)
s = np.linspace(0, 2 * np.pi, 240)
t = np.linspace(0, np.pi, 240)
tGrid, sGrid = np.meshgrid(s, t)
r = 2 + np.sin(7 * sGrid + 5 * tGrid) # r = 2 + sin(7s+5t)
x = r * np.cos(sGrid) * np.sin(tGrid) # x = r*cos(s)*sin(t)
y = r * np.sin(sGrid) * np.sin(tGrid) # y = r*sin(s)*sin(t)
z = r * np.cos(tGrid) # z = r*cos(t)
surface = Surface(x=x, y=y, z=z)
data = Data([surface])
layout = Layout(
title='Parametric Plot',
scene=Scene(
xaxis=XAxis(
gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)'
),
yaxis=YAxis(
gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)'
),
zaxis=ZAxis(
gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=True,
backgroundcolor='rgb(230, 230,230)'
)
)
)
fig = Figure(data=data, layout=layout)
py.iplot(fig)
from plotnine import *
iris = sns.load_dataset("iris")
(
ggplot(iris)
+ aes(x="sepal_length", y="petal_length", colour="species")
+ geom_point()
)
As you can see, there is no shortage of powerful visualization options in Python. That said, I'm still partial to seaborn and matplotlib.
sns.pairplot(iris, hue="species");